# -*- coding: utf-8 -*-
"""
Created on Mon Jul 20 15:43:53 2020

@author: freelon
"""

import pandas as pd

#some of the column names in the function below may need to be changed depending on how the tweets were hydrated

def get_top_rted(dataset,n=100):
    uniq_users = dataset[dataset.retweet_count > 0].screen_name.drop_duplicates()
    top_rts = uniq_users.apply(lambda x: dataset[dataset.screen_name==x].retweet_count.sum())
    top_rts = pd.concat([uniq_users,top_rts],axis=1)
    top_rts.columns = ['username','rt_count']
    top_rts = top_rts.sort_values('rt_count',ascending=False)
    top_n = top_rts.iloc[:n]
    return top_n

pland = pd.read_csv("plandemic_tweets.csv",
                    keep_default_na=False)
trump_anon = pd.read_csv("anon_trump_tweets.csv",
                         keep_default_na=False)

pland_top_rted = get_top_rted(pland)
trump_anon_top_rted = get_top_rted(trump_anon)
print(pland_top_rted.iloc[:20],"\n")
print(trump_anon_top_rted.iloc[:20])
